Introduction

NBA 75 Anniversary Team.
NBA 75 Anniversary Team.
library(tidymodels)
library(ISLR) # For the Smarket data set
library(ISLR2) # For the Bikeshare data set
library(discrim)
library(poissonreg)
library(corrr)
library(corrplot)
library(naniar)
library(klaR) # for naive bayes
tidymodels_prefer()

Import Data Set

library(readr)
data <- read_csv("common_player_info.csv")
df <- data[1:2000, c('height','weight','season_exp','position','school', 'country', 'draft_round','greatest_75_flag')]

# Inspect missing data 
vis_miss(df)

df <- df %>% drop_na()
table(df$greatest_75_flag)
## 
##    N    Y 
## 1813   29
# modify the height data to convert the height data to cms 
df$height <- 2.54*(12* as.numeric(sub("\\-.*", "", df$height)) + as.numeric(sub(".*\\-", "", df$height)))

visualization

cor_lab <- df %>% select(-greatest_75_flag) %>% correlate()
## Non-numeric variables removed from input: `position`, `school`, `country`, and `draft_round`
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
rplot(cor_lab)

From the correlation plot, height and weight # Logistic Regression